*  DataConstruct.do  [10/12/19]
*  
*  This file constructs dataset for analysis of BC carbon tax. Prov-year data
*
*	Start with population data
* 
cd "$root/raw_data"
import excel using Population.xlsx, sheet("Data") firstrow
*drop if province == "Canada"
drop if year < 1981
drop if year > 2016

gen ID = province
replace ID = "AB"  if ID == "Alberta"
replace ID = "BC"  if ID == "British Columbia"
replace ID = "MB"  if ID == "Manitoba"
replace ID = "NB"  if ID == "New Brunswick"
replace ID = "NL"  if ID == "Newfoundland and Labrador"
replace ID = "NS"  if ID == "Nova Scotia"
replace ID = "NT"  if ID == "Northwest Territories"
replace ID = "NTN" if ID == "Northwest Territories including Nunavut"
replace ID = "NU"  if ID == "Nunavut"
replace ID = "ON"  if ID == "Ontario"
replace ID = "PE"  if ID == "Prince Edward Island"
replace ID = "QC"  if ID == "Quebec"
replace ID = "SK"  if ID == "Saskatchewan"
replace ID = "YT"  if ID == "Yukon"

label var ID "province name - short"
label var pop "population"
label var province "province name - long"

sort province year
encode province, gen(pid)
drop province
rename pid province


save temp1, replace
clear

*
*	Add GDP data
* 
import excel using gdp.xlsx, sheet("Data") firstrow

*drop if province == "Canada"
drop if year < 1981
drop if year > 2016

sort province year
encode province, gen(pid)
drop province
rename pid province

merge 1:1 province year using temp1
list year province if _merge==1
list year province if _merge==2
drop if _merge !=3
drop _merge

gen gdppc = gdp/pop*1000000
label var gdp "GDP ($ mill)"
label var gdppc "per capita GDP"


save CanData_temp, replace
clear

*
*	Add commodity prices
* 
import excel using "Commodity.xlsx", sheet("Data") firstrow

sort year
save temp1, replace

use CanData_temp
sort year
merge m:1 year using temp1
drop _merge

label var wood "lumber index ($2005)"
label var wood07 "lumber index ($2007)"
label var wti07 "WTI ($2007)"
order ID year province
save CanData_temp, replace
clear

*
*  Add Total GHG data
*
import excel using "GHG_all.xlsx", sheet("Data") firstrow

sort ID year
reshape wide GHG, i(year) j(ID) string
replace GHGNTN = GHGNT + GHGNU if year > 1998.5
reshape long GHG, i(year) j(ID) string

sort ID year
save temp1.dta, replace

use CanData_temp
sort ID year
merge 1:1 ID year using temp1
tab _merge
drop _merge
label var GHG "Total GHG emissions from national inventory (kT)"
save, replace
erase temp1.dta
clear

*
*  Add CO2 Emissions data
*
import excel using "CO2 Emissions.xlsx", sheet("Data") firstrow

sort ID year
reshape wide co2, i(year) j(ID) string
replace co2NTN = co2NT + co2NU if year > 1998.5
reshape long co2, i(year) j(ID) string

sort ID year
save temp1.dta, replace

use CanData_temp
sort ID year
merge 1:1 ID year using temp1
tab _merge
drop _merge

label var co2 "Fossil fuel carbon dioxide emissions from national inventory (kT)"
save, replace
erase temp1.dta
clear

*
*  Add employment share data
*    [Data constructed with Employment.do]
*
*merge 1:1 ID year using employees
*drop if year==2017
*drop emp3-emp393
*drop _merge

*
*  Add employment share data
*    [Data constructed with Labor.do]

import excel using workers2.xlsx, sheet("Data") firstrow
drop if ID=="CA"

label var ID "province name - short"
sort ID year

label var Total  "total employees"
label var Goods  "goods producing industries"
label var Agriculture "Agriculture"
label var Forestry "Forestry, fishing, mining, quarrying, oil and gas"
label var Utilities "Utilities"
label var Construction "Construction"
label var Manufacturing "Manufacturing"
label var Services "Services-producing sector"
label var Trade "Wholesale and retail trade"
label var Transportation "Transportation and warehousing"
label var Finance "Finance, insurance, real estate, rental and leasing"
label var Professional "Professional, scientific and technical services"
label var Business "Business, building and other support services"
label var Education "Educational services"
label var Health "Health care and social assistance"
label var Information "Information, culture and recreation"
label var Accommodation "Accommodation and food services"
label var Other "Other services (except public administration)"
label var Public "Public administration"

gen Sgoods = Goods/Total
gen Sforest = Forestry/Total
gen Smanuf = Manufacturing/Total
gen Sservices = Services/Total
gen Strade = Trade/Total
gen Strans = Transportation/Total
gen Sprof = Professional/Total
gen Saccom = Accommodation/Total
gen Sotherser = Other/Total
gen Spublic = Public/Total

label var Sgoods "goods employment share"
label var Sforest "forestry, mining, etc share"
label var Smanuf "manufacturing share"
label var Sservices "services employment share"
label var Strade "trade employment share"
label var Strans "transportation employment share"
label var Sprof "professional employment share"
label var Saccom "accommodations employment share"
label var Sotherser "other services employment share"
label var Spublic "public administration employment share"

sum S*
sort ID year
drop Goods-Public V W
save labor, replace


merge 1:1 ID year using CanData_temp
drop if year < 1980.5
drop _merge
save CanData_temp, replace
clear

*
*  Merge in export price data
*


*Old export price index data where 2007 = 100
import excel using export_price_index.xlsx, sheet("Data") firstrow
drop if ID=="CA"
reshape long v, i(ID) j(year2) string
rename v pexport
destring year2, gen(year)
drop year2
save export, replace

merge 1:1 ID year using CanData_temp
drop _merge
save CanData_temp.dta, replace
clear

*New export price index data where 2012 = 100
import excel using export_price_index2.xlsx, sheet("Data") firstrow
reshape long v, i(ID) j(year2) string
reshape wide v, i(year2) j(ID) string
destring year2, gen(year)
drop year2
replace vNTN = (vNT + vNU)/2 if year >= 1999
reshape long v, i(year) j(ID) string
rename v pexport2

drop if (ID == "NT" | ID == "NU") & (year <= 1998) 
drop if year > 2016
sort ID year
save export2, replace


merge 1:1 ID year using CanData_temp

drop _merge

drop if (ID == "NT" | ID == "NU") & (year <= 1998) 


gen gdp_Can = gdp if ID == "Canada"
gen pop_Can = pop if ID == "Canada"

gen can = 0 if ID == "Canada"
replace can = 1 if missing(can)

sort year can province
replace gdp_Can = gdp_Can[_n-1] if !missing(gdp_Can[_n-1]) & ID != "Canada"
replace pop_Can = pop_Can[_n-1] if !missing(pop_Can[_n-1]) & ID != "Canada"

drop if ID == "Canada"
drop can

erase export.dta
erase export2.dta
erase labor.dta
erase CanData_temp.dta

order province ID year
cd "$root/data"
save CanData, replace

